!  First process on the CRSP Compustat - merged data created with SA
! 2/22/16  -- I am now using the late Feb 2016 data format.
!   Note that this code sets up full data matrices N X T  -- for all variables
USE SHARE
IMPLICIT NONE
DOUBLE PRECISION, DIMENSION(26000,900) :: RET,SZ,BV
DOUBLE PRECISION, DIMENSION(1100) :: VWRETD,VWRETX,EWRETD,EWRETX,SPRTN,CRSVAL,USDVAL
INTEGER, DIMENSION(26000,900) :: BVDAT,FININD
INTEGER, DIMENSION(26000) :: TSHO,PERMNO,ISTART,IEND
INTEGER, DIMENSION(1100) :: MDATE,TOTCNT,USDKNT
INTEGER, DIMENSION(100) :: MPN,imtch,XDATE
DOUBLE PRECISION :: R,DR,DIV,at,l1,p1,p2,up,tax,compso,mom,MV,PRC,lt,bm,bs,aggs,rgar2,ssm,lsz,rgar3
DOUBLE PRECISION :: PREF,INPBV
INTEGER :: P,PC,D,SC,DD,DC,NP,HE,DIC,EX,DDD,CUS,lp,TM,IM,NG,il,ik,nobs,fyr,mpnos,ig,RETAIN,S
INTEGER :: SMATCH,i,it,errcod,NTS,status,ITC,siccd,gv,LITS,ITS,ipre,ipost,npc,npn,ITB,ITA,MPC,iubv,igar3
INTEGER :: ja,iuf,j,ia,ib,ial,lreta
CHARACTER * 4 :: GAR,gar2,gar3
OPEN(unit=15, file='/Users/lamlap/BSV/2022/data/IndexInfo.csv', STATUS ='OLD',READONLY,iostat= errcod)
OPEN(unit=18, file='/Users/lamlap/BSV/2022/data/mergeSCAB22.dat', STATUS ='OLD',READONLY,iostat= errcod)
OPEN(unit= 19, file='/Users/lamlap/BSV/2022/data/EXCLUDEfueALLSIC22.dat', STATUS='OLD',READONLY,iostat= errcod)
OPEN(unit=20, file='/Users/lamlap/BSV/2022/data/MULTPNS22.dat', STATUS ='OLD',READONLY,iostat= errcod)
OPEN(unit=21, file='/Users/lamlap/BSV/2022/data/BSVUSEALLSIC22.dat',STATUS='REPLACE')
OPEN(unit=22, file='/Users/lamlap/BSV/2022/data/NEGRETS22.dat',STATUS='REPLACE')
zero=0.0d0
one=1.0d0
! A reminder of how my current codes are structured to apply BSV:
!    Each row contains the permno, date, and return (from CRSP) for that month
!         plus the characteristics applicable to that month:
!             BtM lagged 6 - 18 months
!             Size lagged 2 months
!             Momentum (12 months, lagged 2 months)
!             beta on the CRSP VWMR beta from past 60 months
!             return from the same month last year
!             average return from the same month for the past 5 years
!
!    At this point, (and especially as I'm not doing anything special with dividends and/or splits), I am simply using the
!            return directly from CRSP (in 6 significant digits).
!
!    Irregularities that we see on input:
!                                         1) Multiple permnos for 1 permco.
!                                            Compute the size by multiplying each permno's prc * shrout on CRSP, and summing up.
!                                            Keep the compustat data for the permno that has the longest match.
!                                         2) Multiple rows with the same date because of distributions.
!      ==> This has been handled in earlier programs.
!                                         3) Possible missing compustat data and or switch back to earlier compustat date because of
!                                            change in fiscal year end.
!
!   Read in the master calendar and index file. Month 1 is Jan 1955.
!
!  NTS=11609   ! This is the number of financials, utilities, and permnos with < 50 observations.
!   NTS=8939   -- Now this is determined automatically.
i=0
DO
   i=i+1
   READ (19,196,iostat=status) TSHO(i); 196 format (i5)
   IF (status /= 0) EXIT
END DO
NTS=i-1
NG=349
DO i=1,NG
   READ (15,*)GAR
END DO
IM=0
DO 
   IM=IM+1
   READ (15,*,iostat=status) MDATE(IM),VWRETD(IM),VWRETX(IM),EWRETD(IM),ewretx(IM),SPRTN(IM),rgar2,rgar3,totcnt(IM),&
                             & USDVAL(IM),USDKNT(IM)
   IF(status .ne. 0)  EXIT
END DO
TM=IM-1   !  The total number of months in the pre- and use- samples.  Should be 732 on 2/3/16.
LP=0
ITC=0
NOBS=0
PRINT 7685, TM,MDATE(1),MDATE(TM); 7685 format (' index read.  Months: ',i3,' Beg: ',i8,' end: ',i8)
DO 
   READ (18,1866,iostat=status) P,PC,D,R,PRC,S,SICCD,DDD,INPBV
   1866 format (2(i5,1x),i8,1x,f11.6,1x,f14.5,1x,i10,2x,i4,1x,i8,1x,f14.5)
!                1     2     3      4        5        6     7     8     9    10    11    12    13   14 - 20
   IF (status /= 0) EXIT
   iuf=0
   DO i=1,NTS
      IF (P==TSHO(i)) THEN   ! Get rid of the firms with too few observations (a priori) and in financial and utility industries.
          iuf=1
          EXIT
      END IF
   END DO
   IF (iuf == 1) CYCLE
   IF (P/=LP) THEN
      ITC=ITC+1               ! Keeps track of the number of companies.
      PERMNO(ITC)=P          
      LP=P
      PRINT 8976, ITC,PERMNO(ITC); 8976 format (' ITC: ',i5,' permno: ',i5)
      DO ita=1,TM
         IF (MDATE(ita) == D) THEN
            ITS=ita
            EXIT
         END IF
      END DO
      ISTART(ITC)=ITS
      IF (ITC > 1) THEN
          IEND(ITC-1)=LITS
      END IF
      DO j=1,TM
         RET(itc,j)=-99.d0    ! My base data arrays are N by T
         SZ(itc,j)=-99.d0
         BV(itc,j)=-99.d0
         BVDAT(itc,j)=-99
      END DO
      RET(itc,its)=R
      BVDAT(itc,its)=DDD
      FININD(itc,its)=0
      IF (SICCD >= 6000 .AND. SICCD <= 6999) THEN
          PRINT 8886, itc,its,SICCD
               8886 format (/' 8886 itc ',i5,' its ',i3,' SIC code ',i4//)
          FININD(itc,its)=1
      END IF
      IF (PRC < -999999.d0 .or. S <= 0) THEN   ! The missing price code is -9999999.0
         SZ(itc,its)=-99.d0
      ELSE
         SZ(itc,its)=abs(PRC)*S
!          PRINT 7691, itc,its,prc,s,sz(itc,its)
               7691 format (' itc ',i5,' its ',i3,' prc ',f12.5,' s ',i10,' sz ',f12.5)
      END IF
      IF (inpbv > -99.d0) THEN
         BV(itc,its)=1000.0d0*inpbv       ! Compustat data is in millions and CRSP in thousands.
      END IF
      LITS=ITS
   ELSE                                   ! The above if is for a new company.
!                                           Now we have a continuation of an extant company.
      DO ita=1,TM
         IF (MDATE(ita) == D) THEN
            ITS=ita
            EXIT
         END IF
      END DO
      LITS=ITS
      RET(itc,its)=R
      IF (PRC < -999999.d0 .or. S <= 0) THEN
         SZ(itc,its)=-99.d0
      ELSE
         SZ(itc,its)=abs(PRC)*S
      END IF     
      BVDAT(itc,its)=DDD
      IF (inpbv > -99.d0) THEN
         BV(itc,its)=1000.d0*inpbv
      END IF
      IF (SICCD >= 6000 .AND. SICCD <= 6999) THEN
          PRINT 8886, itc,its,SICCD
          FININD(itc,its)=1
      END IF
   END IF
END DO
!   Since I don't use IEND(ITC) the last company will be fully read-in at this point.
!         No need to treat it separately.
!
!   I now have an array with all usable data and some unusable data.
!
PRINT 61, ITC; 61 format (' Read in complete.  Preliminary array set up. ',i5,' unique permnos.')
!
!  Now let's ``combine'' the cases where there are multiple permnos under the same permco.
!      This has several parts:
!                               1) Identify which series to use for returns
!                               2) Combine the accounting data across the classes.
!                               3) Eliminate the other series (set all returns to missing).
!     Hubbell is an example.  Both classes have full CRSP data (valid returns from Aug 1962 through Dec 2014).  The B class shares Compustat data starts on
!                             19621231, while the A class Compustat data starts on 19821231.
!     But there is also the possibility that the classes may not fully overlap.  I think it's acceptable to keep the one with the largest market cap in
!         each month --of all the classes with valid returns in that month.  
!         This is not fully consistent with measurability since this is measured at the end of the month.   But I don't think it's a problem for obvious reasons.
!
MPNOS=0
DO
   READ (20,2066,iostat=status) MPC,NPN; 2066 format (i5,2x,i3)   ! This is the  permco and the number of permnos associated with it.
   IF (status /= 0) EXIT
   MPNOS=MPNOS+1
!    PRINT 2676, i,MPC; 2676 format (' i ',i3,' MPC: ',i5)
   DO j=1,NPN
      READ (20,2067) MPN(j); 2067 format (2x,i5)
      DO ia=1,ITC
         IF (PERMNO(ia) == MPN(j)) THEN
            imtch(j)=ia
            EXIT
         END IF
      END DO
   END DO
!   At this point we have identified all permnos from PERMCO (i) -- indexed by imtch(j) (j = 1, . . . , NPN)
!      The objective is to get a single company.
   DO it=1,TM
      ig=0
      bs=zero
      AGGS=zero
      IF (it == 1) THEN
         RETAIN=imtch(1)
      ELSE
         RETAIN=lreta
      END IF
      IUBV=imtch(1)
      DO j=1,NPN
         IF (BV(imtch(j),it)>zero) IUBV=imtch(j)
         IF (ret(imtch(j),it) >-9.d0) THEN
            IF (SZ(imtch(j),it) >bs) THEN
               bs=SZ(imtch(j),it)
               retain=imtch(j)
            END IF
            IF (SZ(imtch(j),it) > -9.d0) THEN
               AGGS=AGGS+SZ(imtch(j),it)
               ig=ig+1
            END IF
         END IF
      END DO
      lreta=retain
      RET(imtch(1),it)=RET(retain,it)           !  We'll keep the first permno and 0-out all the others.  Which is one reason I don't even store PERMNOs.
      SZ(imtch(1),it)=AGGS                      !  This is the company's market value of equity.
      BV(imtch(1),it)=BV(IUBV,it)
      BVDAT(imtch(1),it)=BVDAT(IUBV,it)
      FININD(imtch(1),it)=FININD(IUBV,it)
      DO j=2,NPN
         RET(imtch(j),it)=-99.0d0
         SZ(imtch(j),it)=-99.0d0
         BV(imtch(j),it)=-99.0d0
      END DO
   END DO
END DO
!
! Finished with the multiple class cases.
!
! Next we:
!          1.  Compute the beta and residual variance from the preceding 60 months of returns
!          2.  line up size from 2 months prior
!          3.  Obtain size from the month that matches to the data date of the book value -- the variable of interest is ln(1 + B/M).
!          4.  Use last month's size to construct the market weight.
!          5.  Compute aggregate return from Month -13 to -2 as momentum.
!          6.  Construct the same month return from preceding year and average over past 5 years.
!  At this point if any of the data needed for any of these calculations is missing, the stock will not be in the investment opportunity set.    <--------*********
!  I will write out the return at t and the characteristics at t-1 to a file.
!
!  I won't store anything here.  Thus, we will have to read the output file before:
!                                                                                   -- constructing the market weights.
!                                                                                   -- standardizing and normalizing the characteristics.
!
REGN=60
REGK=2
DO ik=1,REGN
   X(ik,1)=one
END DO
DO it=61,TM                                 ! Loop over the months.   it is the loop variable.
   PRINT 6166, it; 6166 format (' month ',i3)
   ik=0
   DO il=it-60,it-1
      ik=ik+1
      X(ik,2)=ewretd(il)
   END DO
   DO j=1,ITC                                       ! Loop Over the Companies   j is the loop variable.
      ib=0
      ik=0
      PRINT 6167, j; 6167 format (' company ',i5)
      DO il=it-60,it-1
         IF (ret(j,il)<-9.d0) THEN               ! Skip this company in month it.
            ib=1
            EXIT
         END IF
         ik=ik+1
         Y(ik)=ret(j,il)
      END DO
      IF (IB == 0) THEN 
         IF (BV(j,it)>zero .and. sz(j,it-2)>zero) THEN
            SMATCH=0
            DO ial=it-24,it-4
               IF (MDATE(ial) >= BVDAT(j,it) .AND. SZ(j,ial) > zero) THEN
!                   BM=log(1.0d0+BV(j,it)/SZ(j,ial))                       ! Should match the date when we measure BV to the same date for MV
                                                                         ! Possible exception is if the FY Date is greater than the last trading day in that month.
                  BM=log(1.0d0+BV(j,it)/SZ(j,it-2))
                  SMATCH=1
!                   PRINT 4321, it,ial,BV(j,it),SZ(j,ial); 4321 format (' it ',i3,' ial ',i3,' bv ',f15.4,' sz ',f15.4)
                  EXIT
               END IF
            END DO
            IF (SMATCH /= 1) THEN
               PRINT 7651, it,j,PERMNO(j); 7651 format (' Month ',i3,' company ',i4,' no match for the bvdat',2x,i5)
               STOP 7652
            END IF
            ik=0
            CALL OLS
            PRINT 7654, it,j,beta; 7654 format (' after ols  Time: ',i3,' company ',i5,' beta',2(2x,f12.6))
            mom=one
            DO il=it-13,it-2
               mom=mom*(1.0d0+ret(j,il))
            END DO
            mom=mom-one
            SSM=zero
            DO ja=1,5
               SSM=SSM+ret(j,it-12*ja)
            END DO
            SSM=.2d0*SSM                                                     ! Average same-month return over past 5 years.
            LSZ=log(sz(j,it-2))
            WRITE (21,2166) PERMNO(j),it,ret(j,it),beta(2),ressd,lsz,BM,mom,ssm,ret(j,it-12),sz(j,it-1),finind(j,it)
                      2166 format (i5,2x,i3,1x,f12.6,1x,f9.6,1x,f11.6,1x,f12.6,1x,f14.6,1x,f12.6,1x,f11.6,1x,f11.6,1x,f17.4,1x,i1)
!             IF (PERMNO(j) == 10007) THEN
!             IF (PERMNO(j) == 32934 .OR. PERMNO(j) == 32942) THEN
            IF (ret(j,it) < -1.0d0) THEN
                WRITE (22,2266) PERMNO(j),it,ret(j,it),beta(2),ressd,lsz,BM,mom,ssm,ret(j,it-12),sz(j,it-1),BV(j,it),BVDAT(j,it)
      2266 format (i5,2x,i3,1x,f12.6,1x,f9.6,1x,f11.6,1x,f12.6,1x,f14.6,1x,f12.6,1x,f11.6,1x,f11.6,1x,f17.4,2x,f12.5,2x,i8)
            END IF
            PRINT 2167, it,ret(j,it),beta(2),ressd,sz(j,it-2),BM,mom,ssm,ret(j,it-12)
                      2167 format (2x,i3,1x,f12.6,1x,f9.6,1x,f11.6,1x,f12.6,1x,f14.6,1x,f12.6,1x,f11.6,1x,f11.6)
         END IF
      END IF
   END DO
END DO
STOP; END
